package com.wxsh.util;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.io.FileUtils;

public class StorySplit {
	
	private static List<String> separators;
	
	static{
		try {
			separators = FileUtils.readLines(new File("C:\\Users\\Administrator\\git\\HanyuLearn\\separators4English.txt"), "UTF-8");
			//separators = FileUtils.readLines(new File("C:\\Users\\Administrator\\git\\HanyuLearn\\separators4Pinyin.txt"), "UTF-8");
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	/**
	 * 万能分割, 基于普通字符分割
	 * 推荐
	 * @throws IOException
	 */
	public static void splitByChar() throws IOException {
		List<String> lines = FileUtils.readLines(new File("D:\\english.txt"), "UTF-8");
		List<String> destLines = new ArrayList<String>();
		List<String> tmpLines = new ArrayList<String>();
		for (String ln : lines) {
			List<String> sentences = new ArrayList<String>();
			sentences.add(ln);
			for(String sep : separators){
				sentences = splitStrArr(sep, sentences);
//				System.out.println(sep);
//				System.out.println(sentences);
			}
			tmpLines.addAll(sentences);
		}
		
		for (String ln : tmpLines) {
			if("".equals(ln)){
				continue;
			}
			destLines.add(ln);
		}
		
		FileUtils.writeLines(new File("D:\\english_split.txt"), "UTF-8", destLines, false);
	}
	
	private static List<String> splitStrArr(String sep, List<String> lines){
		List<String> rtnList = new ArrayList<String>();
		for (String ln : lines) {
			if(ln.equals(sep)){
				rtnList.add(ln);
			}else{
				String[] arr = ln.split(sep, 99999);
				for (int i = 0; i < arr.length; i++) {
					rtnList.add(arr[i]);
					if(sep.contains("\\")){
						rtnList.add(sep.substring(1));
					}else{
						rtnList.add(sep);
					}
				}
				int lastIdx = rtnList.size()-1;
				rtnList.remove(lastIdx);
			}
		}
		return rtnList;
	}

	public static void setSeparators(List<String> separators) {
		StorySplit.separators = separators;
	}
	
	public static void main(String[] args) throws IOException {
		splitByChar();
	}
}
